** Introduction Web Scraping ** Web scraping is a technique for converting the data present in unstructured format (HTML tags) over the web to the structured format which can easily be accessed and used. Most of the data available over the web is not readily available. It is present in an unstructured format (HTML format) and is not downloadable. Therefore, it requires knowledge and expertise to use this data.
We can locate useful data based on their CSS selectors, especially when the webpage uses semantic tag attributes. We can use [selectorgadget] (http://selectorgadget.com/) to find out which css selector matches the “review”. SelectGadget can be added an extension in Google chrome. It is shown as a magnifying glass.
pacman::p_load(tidyverse,tidytext,viridis,rvest,tm,wordcloud,SnowballC,tidyquant,ggridges,scales,highcharter)
We can specify the css selector in html_nodes() and extract the text with html_text(). We scrab over 1800 reviews of Fiat Chrysler Automobiles from glassdoor. There are about 155 webpages which contain these reviews.
n=155
#The reviews has 155 pages,thus n=155
FCA_urls <- paste0("https://www.glassdoor.com/Reviews/FCA-Fiat-Chrysler-Automobiles-Reviews-E149_P",seq(2, n), ".htm")
FCA_urls<-c("https://www.glassdoor.com/Reviews/FCA-Fiat-Chrysler-Automobiles-Reviews-E149.htm",FCA_urls)
FCA_html <- FCA_urls %>%
map_chr(~ read_html(.) %>% html_node(".hreview")%>%html_text())
FCA_html[[1]]
We can remove all unwanted characters at this stage
#Data-Preprocessing: removing '\n'
FCA_html<-gsub("\n","",FCA_html)
#remove all round brackets
FCA_html<-FCA_html%>%str_replace_all("\\(|\\)", "")
#remove all \\
FCA_html<-FCA_html%>%str_replace_all("\\\\", "")
#remove all non words and non numbers
#FCA_html<-FCA_html%>%str_replace_all("[^A-Za-z0-9]", "")
#remove all •
FCA_html<-FCA_html%>%str_replace_all("\\• ", "")
#remove all &
FCA_html<-FCA_html%>%str_replace_all("\\ & ", "")
#remove all non printable words
FCA_html<-FCA_html%>%str_replace_all("[^[:print:]]", "")
#remove all \
FCA_html<-FCA_html%>%str_replace_all(pattern = "\"", replacement = "")
#FCAindeed2<-FCAindeed2%>%stringi::stri_unescape_unicode()
# remove digits
#FCA_html%>%str_replace_all(pattern = "[[:digit:]]+", replacement = "")
#tm::removeNumbers(FCA_html)
#### pattern for dates
pattern ="\\(?\\d{4}\\)?[.-]? *\\d{2}[.-]? *[.-]?\\d{2}"
date=FCA_html%>%str_extract_all(pattern)
#FCA_html[[1]]%>%str_subset(pattern = "([0-9]{1,2})[- .]([a-zA-Z]+)[- .]([0-9]{4})")
#FCA_html[[1]]
#unlist(Date)
Date=as.Date(unlist(date))
#FCA_html_2=data_frame(Date=as.Date(unlist(date)),FCA_html)
get_sentiments(lexicon = "nrc")%>%
count(sentiment, sort = TRUE)
Convert the text data to dataframe.
GlassdoorPages <- data_frame(date=as.Date(unlist(date)),page = seq(1, n),
text = c(FCA_html))%>%arrange(desc(date))
GlassdoorPages%>%head(5)
GlassdoorPages%>%tail(5)
Now we have the letters, and can convert this to a tidy text format.
tidy_FCA <- GlassdoorPages %>%
unnest_tokens(word, text) %>%
add_count(date) %>%
dplyr::rename(date_total = n)
#remove stop words
data("stop_words")
tidy_FCA <- tidy_FCA %>%
anti_join(stop_words)
stop_word=data_frame(word=c("chrysler","fca","linklink","fiat","whatsappshar","auburn","twittershar","fca"))
tidy_FCA <- tidy_FCA %>%
anti_join(stop_words)
tidy_FCA%>%head()
Next, let’s implement the sentiment analysis.
FCA_sentiment <- tidy_FCA %>%
inner_join(get_sentiments("nrc"))
FCA_sentiment%>%head()
Now we have all we need to see the relative changes in these sentiments over the years.
theme_set(theme_bw())
#Alternatively
#FCA_sentiment%>%group_by(page, page_total, sentiment)%>%count()
FCA_sentiment %>%
count(date, date_total, sentiment) %>%
filter(sentiment %in% c("positive", "negative",
"joy", "trust","fear","sadness"))%>%
mutate(sentiment = as.factor(sentiment)) %>%
#ggplot(aes(page, n / page_total, fill = sentiment)) +
ggplot(aes(date, n / sum(n), fill = sentiment)) +
geom_area(position = "identity", alpha = 0.5) +
labs(y = "Relative frequency", x = "Year",
title = "Sentiment analysis of FCA Glassdoor Reviews",
subtitle = "Using the nrc lexicon")+theme_bw()+
scale_fill_manual(values=viridis_pal(option = "D")(6))+
scale_y_continuous(labels = scales::percent)
FCA_sentiment %>%
count(date, date_total, sentiment) %>%
filter(sentiment %in% c("positive", "negative",
"joy", "trust","fear","sadness"))%>%
mutate(sentiment = as.factor(sentiment)) %>%
#ggplot(aes(page, n / page_total, fill = sentiment)) +
ggplot(aes(x=date,y= n / sum(n), fill = sentiment,height=n / sum(n),group=sentiment)) +
geom_ridgeline_gradient() +
labs(y = "Relative frequency", x = "Year",
title = "Sentiment analysis of FCA Glassdoor Reviews",
subtitle = "Using the nrc lexicon")+theme_bw()+
scale_fill_viridis(discrete = TRUE, direction = -1) +
scale_y_continuous(labels = scales::percent)
#expand x and y limits
#expand_limits(x=c(0,160), y=c(0, 0.0005))
At the beginning 2008 to the end of 2013,the positive sentiments outweigh the negative sentiments. The level of trust in FCA was also higher compared to sad sentiments expressed the reviewers. From 2014, the percentage of positive/trust sentiments does not overwhemly dorminate the negative/sadness sentiments. In general, the positive sentiments marked a decline from the 2014 onwards.
FCA_sentiment %>%
count(date, date_total, sentiment) %>%
# filter(sentiment %in% c("positive", "negative", "joy", "trust","fear","sadness"))%>%
mutate(sentiment = forcats::fct_lump(sentiment, 6))%>%
#mutate(sentiment = as.factor(sentiment)) %>%
ggplot(aes(date, n / date_total, fill = sentiment)) +
#ggplot(aes(page, n / sum(n), fill = sentiment)) +
geom_area(position = "identity", alpha = 0.5) +
labs(y = "Relative frequency", x = "Year",
title = "Sentiment analysis of FCA Glassdoor Reviews",
subtitle = "Using the nrc lexicon")+theme_bw()+
scale_fill_manual(values=viridis_pal(option = "A")(7))+
scale_y_continuous(labels = scales::percent)
tidy_FCA %>%
inner_join(get_sentiments("afinn")) %>%
group_by(date) %>%
summarize(average_sentiment = mean(score), words = n()) %>%
#filter(words >= 10) %>%
ggplot(aes(date, average_sentiment)) +
geom_line() +
geom_hline(color = "red", lty = 2, yintercept = 0) +
labs(y = "Average AFINN sentiment score", x = "Year",
title = "Sentiment analysis of FCA Glassdoor Reviews",
subtitle = "Using the affin lexicon")+
geom_smooth(method = "loess")
There average sentiment was positive at the first half of 2008. From the middle of 2008 to last quarter of 2009, there was a downward trend in the average sentiments expressed by reviewers. This can be attributed to the Global financial crisis which started around that time.The highest positive reviews occurred in october 2015 and April 2016 whereas the lowest negative sentiments were expressed in August 2016.
ldat=tidy_FCA %>%
inner_join(get_sentiments("afinn")) %>%
group_by(date) %>%
summarize(average_sentiment = mean(score), words = n())
highchart() %>%
hc_title(text = "Sentiment analysis of FCA Glassdoor Reviews")%>%
hc_add_series_times_values(ldat$date, ldat$average_sentiment,
name = "Year",color="#440154FF")%>%
hc_yAxis(title = list(text = "Average AFINN sentiment score"),labels = list(format = "{value}"), max = 4,min=-4,plotLines = list(
list(label = list(text = ""),
color = "#35B779FF",
width = 2,
value = 0)))
highchart(type = "stock") %>%
hc_title(text = "Sentiment analysis of FCA Glassdoor Reviews") %>%
hc_subtitle(text = "") %>%
hc_tooltip(valueDecimals = 2) %>%
hc_add_series_times_values(ldat$date, ldat$average_sentiment,
name = "",color="#440154FF")%>%
hc_add_theme(hc_theme_gridlight())%>%
hc_yAxis(title = list(text = "Average AFINN sentiment score"),labels = list(format = "{value}"), max = 4,min=-4,plotLines = list(
list(label = list(text = ""),
color = "red",
width = 2,
value = 0)))
tidy_FCA %>%
inner_join(get_sentiments("afinn")) %>%
group_by(date) %>%
summarize(average_sentiment = mean(score), words = n()) %>%
# filter(words >= 5) %>%
ggplot(aes(date, average_sentiment)) +
geom_line( )+
theme_minimal()+
geom_ridgeline_gradient(aes(y=0,height=average_sentiment,fill=average_sentiment),min_height=-3.5)+
scale_fill_viridis(option="C",limit=c(-3.5,4))+
#geom_line() +
geom_hline(color = "red", lty = 2, yintercept = 0) +
labs(y = "Average AFINN sentiment score", x = "Page",
title = "Sentiment analysis of FCA Glassdoor Reviews",
subtitle = "Using the affin lexicon")
FCA_sentiment %>%
count(sentiment, word) %>%
filter(sentiment %in% c("positive", "negative",
"joy", "trust","fear","sadness")) %>%
group_by(sentiment) %>%
top_n(10) %>%
ungroup %>%
mutate(word = reorder(word, n)) %>%
mutate(sentiment = as.factor(sentiment)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_bar(alpha = 0.8, show.legend = FALSE,stat = "identity") +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
facet_wrap(~sentiment, scales = "free") +
labs(y = "Total number of occurrences", x = "",
title = "Sentiment analysis of FCA Glassdoor Reviews",
subtitle = "Using the nrc lexicon")+theme_bw()+
scale_fill_manual(values=viridis_pal(option = "D")(6))
# # change text into italics
# theme(strip.text = element_text(face = "italic")) +
# # strip horizontal axis labels
# theme(axis.title.x=element_blank()) +
# theme(axis.ticks.x=element_blank()) +
# theme(axis.text.x=element_blank())
FCA_sentiment %>%
count(date, date_total, sentiment) %>%
filter(sentiment %in% c("positive", "negative",
"joy", "trust","fear","sadness"))%>%
mutate(sentiment = factor(sentiment, levels = c("negative",
"positive",
"joy", "trust","fear","sadness"))) %>%
ggplot(aes(date, n / date_total, fill = sentiment)) +
geom_area(position = "identity", alpha = 0.5) +
labs(y = "Relative frequency", x = NULL,
title = "Sentiment analysis of FCA Glassdoor Reviews",
subtitle = "Using the nrc")+theme_bw()
FCA_sentiment <- tidy_FCA %>%
inner_join(get_sentiments("bing"))
FCA_sentiment %>%
count(date, date_total, sentiment)%>%
mutate(sentiment = as.factor(sentiment))%>%
ggplot(aes(date, n / date_total, fill = sentiment)) +
geom_area(position = "identity", alpha = 0.5) +
labs(y = "Relative frequency", x = "Page",
title = "Sentiment analysis of FCA Glassdoor Reviews",
subtitle = "Using the nrc")+theme_bw()+
# scale_fill_manual(values=viridis_pal(option = "plasma")(2))+
scale_y_continuous(labels = scales::percent)
The negative and positive sentiments distribution is similar with the negative sentiments having a higher peak. The negative reviews is evenly distributed as like the positive reviews. Neither is clearly superior over the other.
GlassdoorPages %>%
unnest_tokens(word, text)%>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE)%>%
spread(sentiment,n,fill=0)%>%
mutate(sentiment = positive -negative)%>%
ggplot(aes(x = sentiment)) +
geom_density(color = palette_light()[1], fill = palette_light()[1], alpha = 0.8) +
theme_tq()+xlim(c(-5,5))
den=GlassdoorPages %>%
unnest_tokens(word, text)%>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE)%>%
spread(sentiment,n,fill=0)%>%
mutate(sentiment = positive -negative)
#hchart(den,density(den$sentiment), type = "area", color = "#B71C1C", name = "Density")
hchart(density(den$sentiment), type = "area", color =viridis_pal()(1), name = "Sentiment")%>%
hc_xAxis(min = -5, max =5)%>%
hc_yAxis(title = list(text = "density"),labels = list(format = "{value}"))
The most common positive and negative words are visualized below.
FCA_sentiment %>%
count(sentiment, word) %>%
group_by(sentiment) %>%
top_n(15) %>%
ungroup %>%
mutate(word = reorder(word, n)) %>%
mutate(sentiment = as.factor(sentiment)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_col(alpha = 0.8, show.legend = FALSE) +
coord_flip() +
scale_y_continuous(expand = c(0,0)) +
facet_wrap(~sentiment,scales="free") +
labs(y = "Total number of occurrences", x = "",
title = "Sentiment analysis of FCA Glassdoor Reviews",
subtitle = "Using the bing lexicon")+
#scale_fill_manual(values=viridis_pal(option = "D")(8))+
scale_fill_viridis(end = 0.75, discrete=TRUE, direction = -1) +
scale_x_discrete(expand=c(0.02,0)) +
theme(strip.text=element_text(hjust=0)) +
# change text into italics
theme(strip.text = element_text(face = "italic")) +
# strip horizontal axis labels
theme(axis.title.x=element_blank()) +
theme(axis.ticks.x=element_blank()) +
theme(axis.text.x=element_blank())+
theme_minimal(base_size = 13)
The count most common positive and negative sentiment is displayed graphicaly below.
bing_word_counts <-tidy_FCA %>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
ungroup()
bing_word_counts%>%spread(sentiment,n,fill = 0)%>%top_n(10)
bing_word_counts%>%spread(sentiment,n,fill = 0)%>%top_n(-10)%>%head(10)
bing_word_counts %>%
filter(n > 3) %>%
mutate(n = if_else(sentiment == "negative", -n, n)) %>%
mutate(word = reorder(word, n)) %>%
ggplot(aes(word, n, fill = sentiment)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Contribution to sentiment",title="bing sentiments")+
#scale_fill_manual(values=viridis_pal(option = "D")(2))
scale_fill_viridis(end = 0.85, discrete=TRUE, direction = 1)
# n=4
#
# FCA=list()
#
# for (i in 2:n){
#
# FCA[[1]]=read_html("https://www.glassdoor.com/Reviews/FCA-Fiat-Chrysler-Automobiles-Reviews-E149.htm")%>% html_nodes(".hreview")%>%html_text(trim = TRUE)
#
# FCA[[i]]=read_html(paste("https://www.glassdoor.com/Reviews/FCA-Fiat-Chrysler-Automobiles-Reviews-E149_P",i,".htm",sep = ""))%>% html_nodes(".hreview")%>%html_text(trim = TRUE)
#
# }
#FCA_html2<-FCA_html%>%str_replace_all("[[:xdigit:]]", "")
corpus = Corpus(VectorSource(FCA_html))
corpus = tm_map(corpus, tolower)
corpus<- tm_map(corpus, stripWhitespace)
corpus = tm_map(corpus, removeNumbers)
corpus = tm_map(corpus, removeWords, stopwords("english"))
stop_user=c("chrysler","fca","linklink","fiat","whatsappshar","auburn","twittershar","automobil","edit","delet","via","edit","delet","via","starstarstarstarstarwork","pdt","hill","facebookshar")
corpus = tm_map(corpus, removeWords,stop_user )
tdm <- TermDocumentMatrix(corpus,
control = list(removePunctuation = TRUE,
stopwords = TRUE,
removeNumbers = TRUE, tolower = TRUE,
PlainTextDocument=TRUE,
stripWhitespace=TRUE, stemming = TRUE))
inspect(tdm)
<<TermDocumentMatrix (terms: 2240, documents: 155)>>
Non-/sparse entries: 9692/337508
Sparsity : 97%
Maximal term length: 39
Weighting : term frequency (tf)
Sample :
Docs
Terms 113 116 27 29 43 48 68 81 84 87
ago 2 2 1 1 1 1 1 1 1 2
automobil 2 2 3 3 2 3 2 3 3 3
copi 1 1 1 1 1 1 1 1 1 1
delet 1 1 1 1 1 1 1 1 1 1
emailcopi 1 1 1 1 1 1 1 1 1 1
employe 3 2 4 2 6 3 3 2 2 3
life 1 3 2 1 1 2 1 1 1 2
respons 1 1 1 1 1 1 1 1 1 1
second 1 1 2 1 1 1 1 1 1 1
work 4 2 8 2 3 6 2 1 3 3
tidy(tdm)
tdm = as.matrix(tdm)
frequencies = DocumentTermMatrix(corpus)
frequencies
<<DocumentTermMatrix (documents: 155, terms: 2749)>>
Non-/sparse entries: 9971/416124
Sparsity : 98%
Maximal term length: 39
Weighting : term frequency (tf)
findFreqTerms(frequencies, lowfreq=100)
[1] "ago" "automobiles"
[3] "copied" "delete"
[5] "emailcopy" "employee"
[7] "facebookshare" "flag"
[9] "inappropriateflag" "inappropriatehelpful"
[11] "response" "seconds"
[13] "time" "twittershare"
[15] "whatsappshare" "work"
[17] "anonymous" "balanceculturevaluescareer"
[19] "life" "opportunitiescompbenefitssenior"
#%>%as_tibble()%>%top_n(10)
Remove sparse terms
sparse = removeSparseTerms(frequencies, 0.995)
sparse
<<DocumentTermMatrix (documents: 155, terms: 2749)>>
Non-/sparse entries: 9971/416124
Sparsity : 98%
Maximal term length: 39
Weighting : term frequency (tf)
What about associations between words? Let’s have a look at what other words had a high association with “love”.
findAssocs(frequencies, c("love","poor","flexible","horrible"), c(0.6,0.6,0.6,0.6))
$love
dayton featured
0.89 0.89
fieldconsno mechanicstarstarstarstarstarcurrent
0.89 0.89
ohi retirementadvice
0.89 0.89
taught workshare
0.89 0.89
yearsprospros contained
0.89 0.63
folk plan
0.63 0.63
review staying
0.63 0.63
technician technology
0.63 0.63
$poor
seen advances allowed
0.63 0.60 0.60
cheapest choice clue
0.60 0.60 0.60
creation currect decides
0.60 0.60 0.60
developed drained efforts
0.60 0.60 0.60
energy ethics executed
0.60 0.60 0.60
executive exited experiences
0.60 0.60 0.60
fantastic feature feedback
0.60 0.60 0.60
final flounder gouge
0.60 0.60 0.60
hammers highlighted incorporate
0.60 0.60 0.60
interest items labor
0.60 0.60 0.60
merger mirrors outstanding
0.60 0.60 0.60
participate partners price
0.60 0.60 0.60
questionable reduce requested
0.60 0.60 0.60
shown sighted sink
0.60 0.60 0.60
slogans smoke stimulate
0.60 0.60 0.60
swimstarstarstarstarstarwork talked tco
0.60 0.60 0.60
viability view washington
0.60 0.60 0.60
$flexible
numeric(0)
$horrible
outlook office
0.63 0.62
The most commonly words used in the reviews is plotted below.
The most common positive and negative words are graphically depicted below.
c("edit","delet","via","starstarstarstarstarwork","pdt","hill","facebookshar")
[1] "edit" "delet" "via"
[4] "starstarstarstarstarwork" "pdt" "hill"
[7] "facebookshar"
GlassdoorPages %>%
unnest_tokens(word, text)%>%
inner_join(get_sentiments("bing")) %>%
count(word, sentiment, sort = TRUE) %>%
reshape2::acast(word ~ sentiment, value.var = "n", fill = 0) %>%
comparison.cloud(colors = viridis_pal(option = "D")(2),
max.words = 100)
GlassdoorPages %>%
unnest_tokens(word, text)%>%
inner_join(get_sentiments("nrc")) %>%
count(word, sentiment, sort = TRUE)%>%
spread( word,n,fill = 0)%>%head(5)
Among some of the words commonly used by reviewers to express positive,negative,joy or sadness is displayed in the word cloud below.
GlassdoorPages %>%
unnest_tokens(word, text)%>%
inner_join(get_sentiments("nrc")) %>%
count(word, sentiment, sort = TRUE)%>%
filter(sentiment %in% c("negative","positive","joy","sadness"))%>%
reshape2::acast(word ~ sentiment, value.var = "n", fill = 0)%>%
comparison.cloud(colors = viridis_pal(option = "D")(4),
max.words = 200)
library(igraph)
library(ggraph)
tidy_descr_ngrams=GlassdoorPages %>%
unnest_tokens(word, text, token = "ngrams", n = 2) %>%
separate(word, c("word1", "word2"), sep = " ") %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word)%>%
filter(!word1 %in% stop_user) %>%
filter(!word2 %in% stop_user)%>%
mutate(word1 = removeNumbers(word1))%>%
mutate(word2 = removeNumbers(word2))
bigram_counts=tidy_descr_ngrams %>%
count(word1, word2, sort = TRUE)
bigram_graph =bigram_counts %>%
filter(n > 10) %>%
graph_from_data_frame()
set.seed(1)
a=grid::arrow(type = "closed", length = unit(.15, "inches"))
ggraph(bigram_graph, layout = "fr") +
geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
arrow = a, end_cap = circle(.07, 'inches')) +
geom_node_point(color = palette_light()[1], size = 5, alpha = 0.8) +
geom_node_text(aes(label = name), vjust = 1, hjust = 0.5) +
theme_void()
The most common word is employee which suggest majority of the reviewers were either employees or ex-employees.The rest are related to management and the work environment.
data(stop_words)
tidy_descr<-GlassdoorPages %>%
unnest_tokens(word, text) %>%
mutate(word=removeNumbers(word))%>%
mutate(word_stem = wordStem(word)) %>%
anti_join(stop_words, by = "word") %>%
filter(!word_stem %in% stop_words$word) %>%
filter(!word_stem %in% stop_user)
tidy_descr %>%
count(word_stem, sort = TRUE) %>%
filter(n > 30) %>%
ggplot(aes(x = reorder(word_stem, n), y = n)) +
geom_col(color = palette_light()[1], fill = palette_light()[1], alpha = 0.8) +
coord_flip() +
theme_tq() +
labs(x = "",
y = "count of most common words",titlt="Count of most common words")
tidy_descr %>%
count(word_stem) %>%
mutate(word_stem = removeNumbers(word_stem)) %>%
with(wordcloud(word_stem, n, max.words = 100, colors = palette_light()))
bigram_counts %>%
mutate(word1 = removeNumbers(word1))%>%
mutate(word2 = removeNumbers(word2))%>%
filter(n > 20) %>%
ggplot(aes(x = reorder(word1,-n), y = reorder(word2,-n), fill = n)) +
geom_tile(alpha = 0.8, color = "white") +
scale_fill_gradientn(colours = c(palette_light()[[1]], palette_light()[[2]])) +
coord_flip() +
theme_tq() +
theme(legend.position = "right") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
labs(x = "first word in pair",
y = "second word in pair")
tidy_descr_ngrams=GlassdoorPages %>%
unnest_tokens(word, text, token = "ngrams", n = 2)%>%
separate(word, c("word1", "word2"), sep = " ")%>%
mutate(word1=removeNumbers(word1))%>%
mutate(word1 = wordStem(word1))%>%
mutate(word2=removeNumbers(word2))%>%
mutate(word2 = wordStem(word2))
tidy_descr_ngrams
tidy_FCA %>%
inner_join(get_sentiments("bing"))%>%
group_by(sentiment)%>%count()%>%
ggplot(aes(x = reorder(sentiment, n), y = n,fill=palette_light()[1])) +
geom_col( alpha = 0.8,width = 0.5) +
coord_flip() +
theme_tq()+
labs(y="sentiments ",title="bing lexicon sentiment count" ,x="frequency")+
theme(legend.position="none")+
#scale_fill_viridis(end = 0.85, discrete=TRUE, direction = 1,option = "D")
#scale_fill_manual(values=viridis_pal(option = "A")(2))
scale_fill_tq()
tidy_FCA %>%
inner_join(get_sentiments("nrc"))%>%
group_by(sentiment)%>%count()%>%
ggplot(aes(x = reorder(sentiment, n), y = n,fill=palette_light()[1])) +
geom_col( alpha = 0.8) +
coord_flip() +
theme_tq()+
labs(y="sentiments ",title="nrc lexicon sentiment count" ,x="frequency")+
theme(legend.position="none")+
#scale_fill_viridis(end = 0.85, discrete=TRUE, direction = 1,option = "D")
#scale_fill_manual(values=viridis_pal(option = "A")(2))
scale_fill_tq()